CVC Data Summaries

Setup the basic working environment


In [ ]:
%matplotlib inline

import os
import sys
import datetime
import warnings

import numpy as np
import matplotlib.pyplot as plt
import pandas
import seaborn
seaborn.set(style='ticks', context='paper')

import wqio
import pybmpdb
import pynsqd

import pycvc

min_precip = 1.9999
palette = seaborn.color_palette('deep', n_colors=6)
pybmpdb.setMPLStyle()
POCs = [p['cvcname'] for p in filter(lambda p: p['include'], pycvc.info.POC_dicts)]

if wqio.testing.checkdep_tex() is None:
    warnings.warn("LaTeX not found on system path. You will not be able to compile ISRs to PDF files", UserWarning)

Hydrologic Relationships

$V_{\mathrm{runoff, \ LV1}} = \max\left(0,\: -12.05 + 2.873\, D_{\mathrm{precip}} + 0.863 \, \Delta t \right)$


In [ ]:
def LV1_runoff(row):
    return max(0, -12.0 + 2.87 * row['total_precip_depth'] + 0.863 * row['duration_hours'])

ED-1

$\log \left(V_{\mathrm{runoff, \ ED1}}\right) = 1.58 + 0.000667 \, I_{\mathrm{max}} + 0.0169 \, D_{\mathrm{precip}} $

$V_{\mathrm{bypass, \ ED1}} = \max \left(0,\: -26.4 + 0.184 \, I_{\mathrm{max}} + 1.22 \, D_{\mathrm{precip}} \right)$

$V_{\mathrm{inflow, \ ED1}} = \max \left(0,\: V_{\mathrm{runoff, \ ED1}} - V_{\mathrm{bypass, \ ED1}} \right)$


In [ ]:
def ED1_runoff(row):
    return 10**(1.58 + 0.000667 * row['peak_precip_intensity'] + 0.0169 * row['total_precip_depth'] )

def ED1_bypass(row):
    return max(0, -26.4 + 0.184 * row['peak_precip_intensity'] + 1.22 * row['total_precip_depth'])

def ED1_inflow(row):
    return max(0, ED1_runoff(row) - ED1_bypass(row))

LV-2

$\log \left(V_{\mathrm{runoff, \ LV2}}\right) = 1.217 + 0.00622 \, I_{\mathrm{max}} + 0.0244 \, D_{\mathrm{precip}} $

$V_{\mathrm{bypass, \ LV2}} = 0$

$V_{\mathrm{inflow, \ LV2}} = \max \left(0,\: V_{\mathrm{runoff, \ LV2}} - V_{\mathrm{bypass, \ LV2}} \right)$


In [ ]:
def LV2_runoff(row):
    return 10**(1.22 + 0.00622 * row['peak_precip_intensity'] + 0.0244 * row['total_precip_depth'] )

def LV2_bypass(row):
    return 0

def LV2_inflow(row):
    return max(0, LV2_runoff(row) - LV2_bypass(row))

LV-4

$\log \left(V_{\mathrm{runoff, \ LV4}}\right) = 1.35 + 0.00650 \, I_{\mathrm{max}} + 0.00940 \, D_{\mathrm{precip}} $

$V_{\mathrm{bypass, \ LV4}} = \max \left(0,\: 7.37 + 0.0370 \, I_{\mathrm{max}} + 0.112 \, D_{\mathrm{precip}} \right)$

$V_{\mathrm{inflow, \ LV4}} = \max \left(0,\: V_{\mathrm{runoff, \ LV4}} - V_{\mathrm{bypass, \ LV4}} \right)$


In [ ]:
def LV4_runoff(row):
    return 10**(1.35 + 0.00650 * row['peak_precip_intensity'] + 0.00940 * row['total_precip_depth'] )

def LV4_bypass(row):
    return max(0, 7.36 + 0.0370 * row['peak_precip_intensity'] + 0.112 * row['total_precip_depth'])

def LV4_inflow(row):
    return max(0, LV4_runoff(row) - LV4_bypass(row))

Water quality loading relationship

$ M_{\mathrm{runoff}} = V_{\mathrm{runoff}} \times \hat{\mathbb{C}}_{\mathrm{inflow}}\left(\mathrm{landuse,\ season}\right) $

$ M_{\mathrm{bypass}} = V_{\mathrm{bypass}} \times \hat{\mathbb{C}}_{\mathrm{inflow}}\left(\mathrm{landuse,\ season}\right) $

$ M_{\mathrm{inflow}} = M_{\mathrm{runoff}} - M_{\mathrm{bypass}} $

$ M_{\mathrm{outflow}} = V_{\mathrm{outflow}} \times \mathbb{C}_{\mathrm{outflow}} $

Load External Data (this takes a while)


In [ ]:
bmpdb = pycvc.external.bmpdb(palette[3], 'D')
nsqdata = pycvc.external.nsqd(palette[2], 'd')

Load CVC Database


In [ ]:
cvcdbfile = "C:/users/phobson/Desktop/cvc.accdb"
cvcdb = pycvc.Database(cvcdbfile, nsqdata, bmpdb, testing=False)

Define the site object for the reference site and compute its median values ("influent" to other sites)


In [ ]:
LV1 = pycvc.Site(db=cvcdb, siteid='LV-1', raingauge='LV-1', tocentry='Lakeview Control', 
                 isreference=True, influentmedians=pycvc.wqstd_template(),
                 runoff_fxn=LV1_runoff, minprecip=min_precip,
                 color=palette[1], marker='s')

Lakeview BMP sites get their "influent" data from LV-1


In [ ]:
LV_Influent = (
    LV1.wqdata
       .query("sampletype == 'composite'")
       .groupby(by=['season', 'parameter', 'units'])['concentration']
       .median()
       .reset_index()
       .rename(columns={'concentration': 'influent median'}) 
)
LV_Influent.head()

Elm Drive's "influent" data come from NSQD


In [ ]:
ED_Influent = (
    cvcdb.nsqdata
         .medians.copy()
         .rename(columns={'NSQD Medians': 'influent median'})
)
ED_Influent.head()

Remaining site objects


In [ ]:
ED1 = pycvc.Site(db=cvcdb, siteid='ED-1', raingauge='ED-1',
                 tocentry='Elm Drive', influentmedians=ED_Influent, 
                 minprecip=min_precip, isreference=False,
                 runoff_fxn=ED1_runoff, bypass_fxn=ED1_bypass,
                 inflow_fxn=ED1_inflow, color=palette[0], marker='o')

LV2 = pycvc.Site(db=cvcdb, siteid='LV-2', raingauge='LV-1',
                 tocentry='Lakeview Grass Swale', influentmedians=LV_Influent, 
                 minprecip=min_precip, isreference=False,
                 runoff_fxn=LV2_runoff, bypass_fxn=LV2_bypass,
                 inflow_fxn=LV2_inflow, color=palette[4], marker='^')

LV4 = pycvc.Site(db=cvcdb, siteid='LV-4', raingauge='LV-1',
                 tocentry=r'Lakeview Bioswale 1$^{\mathrm{st}}$ South Side', 
                 influentmedians=LV_Influent, 
                 minprecip=min_precip, isreference=False,
                 runoff_fxn=LV4_runoff, bypass_fxn=LV4_bypass,
                 inflow_fxn=LV4_inflow, color=palette[5], marker='v')

Fix ED-1 storm that had two composite samples


In [ ]:
ED1.hydrodata.data.loc['2012-08-10 23:50:00':'2012-08-11 05:20', 'storm'] = 0
ED1.hydrodata.data.loc['2012-08-11 05:30':, 'storm'] += 1

Replace total inflow volume with estimate from simple method for 2013-07-08 storm


In [ ]:
storm_date = datetime.date(2013, 7, 8)
for site in [ED1, LV1, LV2, LV4]:
    bigstorm = site.storm_info.loc[site.storm_info.start_date.dt.date == storm_date].index[0]
    inflow = site.drainagearea.simple_method(site.storm_info.loc[bigstorm, 'total_precip_depth'])
    site.storm_info.loc[bigstorm, 'inflow_m3'] = inflow
    site.storm_info.loc[bigstorm, 'runoff_m3'] = np.nan
    site.storm_info.loc[bigstorm, 'bypass_m3'] = np.nan

High-level summaries

Hydrologic Summary


In [ ]:
stormfile = pandas.ExcelWriter("output/xlsx/CVCHydro_StormInfo.xlsx")
hydrofile = pandas.ExcelWriter("output/xlsx/CVCHydro_StormStats.xlsx")

with pandas.ExcelWriter("output/xlsx/CVCHydro_StormStats.xlsx") as hydrofile,\
     pandas.ExcelWriter("output/xlsx/CVCHydro_StormInfo.xlsx") as stormfile: 
    for site in [ED1, LV1, LV2, LV4]:
        site.storm_info.to_excel(stormfile, sheet_name=site.siteid)
        site.storm_stats().to_excel(hydrofile, sheet_name=site.siteid)

Hydrologic Pairplots

(failing on LV-2's outflow is expected)


In [ ]:
for site in [ED1, LV2, LV4]:
    for by in ['year', 'outflow', 'season']:
        try:               
            site.hydro_pairplot(by=by)
        except:
            print('failed on {}, {}'.format(site, by))

Prevalence Tables


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_DataInventory.xlsx') as prev_tables:
    for site in [ED1, LV1, LV2, LV4]:
        stype = 'composite'
        site.prevalence_table()[stype].to_excel(prev_tables, sheet_name='{}'.format(site.siteid))

Concentrations Stats


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_ConcStats.xlsx') as concfile:
    for site in [ED1, LV1, LV2, LV4]:
        concs = site.wq_summary('concentration', sampletype='composite').T
        concs.to_excel(concfile, sheet_name=site.siteid, na_rep='--')

Load Stats


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_LoadStats.xlsx') as loadstats:
    for site in [ED1, LV1, LV2, LV4]:
        load = (
            site.wq_summary('load_outflow', sampletype='composite')
                .stack(level='parameter')
                .stack(level='load_units')
        )
        load.to_excel(loadstats, sheet_name=site.siteid, na_rep='--')

Tidy Data


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_TidyData.xlsx') as tidyfile:
    for site in [ED1, LV1, LV2, LV4]:
        site.tidy_data.to_excel(tidyfile, sheet_name=site.siteid, na_rep='--')

Total Loads Summary


In [ ]:
with pandas.ExcelWriter('output/xlsx/CVCWQ_LoadTotals.xlsx') as loadfile:
    for site in [ED1, LV1, LV2, LV4]:
        loads = site.load_totals(sampletype='composite')
        loads.to_excel(loadfile, sheet_name=site.siteid, na_rep='--')

Analysis


In [ ]:
seaborn.set(style='ticks', context='paper')
pybmpdb.setMPLStyle()

Individual Storm Reports

(requires $\LaTeX$)


In [ ]:
for site in [ED1, LV1, LV2, LV4]:
    print('\n----Compiling ISR for {0}----'.format(site.siteid))
    site.allISRs('composite', version='draft')

Precip-outflow scatter plots


In [ ]:
for site in [ED1, LV1, LV2, LV4]:
    print('\n----Summarizing {0}----'.format(site.siteid))
    
    site.hydro_jointplot(
        xcol='total_precip_depth', 
        ycol='outflow_mm', 
        conditions="outflow_mm > 0", 
        one2one=True
    )

    site.hydro_jointplot(
        xcol='antecedent_days', 
        ycol='outflow_mm', 
        conditions="outflow_mm > 0", 
        one2one=False
    )

    site.hydro_jointplot(
        xcol='total_precip_depth', 
        ycol='antecedent_days', 
        conditions="outflow_mm == 0", 
        one2one=False
    )
    
    site.hydro_jointplot(
        xcol='peak_precip_intensity', 
        ycol='peak_outflow', 
        conditions=None, 
        one2one=False
    )
    
    plt.close('all')

WQ Comparison

Lists of sites to compare


In [ ]:
site_lists = [
    [ED1],
    [LV1, LV2, LV4],
]

Individual Figures


In [ ]:
for sl in site_lists:
    print('\n----Comparing {}----'.format(', '.join([s.siteid for s in sl])))
    for poc in POCs:
        print('  ' + poc)
        
        wqcomp = pycvc.summary.WQComparison(sl, 'composite', poc, nsqdata, bmpdb)
        
        wqcomp.seasonalBoxplots(load=False, finalOutput=True)
        wqcomp.seasonalBoxplots(load=True, finalOutput=True)
        
        wqcomp.landuseBoxplots(finalOutput=True)
        wqcomp.bmpCategoryBoxplots(finalOutput=True)
        
        wqcomp.parameterStatPlot(finalOutput=True)
        wqcomp.parameterStatPlot(load=True, finalOutput=True)
        
        wqcomp.parameterTimeSeries(finalOutput=True)  
        wqcomp.parameterTimeSeries(load=True, finalOutput=True)  

        plt.close('all')

Megafigures


In [ ]:
for sl in site_lists:
    print('\n----Megafigs with {}----'.format(', '.join([s.siteid for s in sl])))
    
    # construct the megafigures
    mf1 = pycvc.summary.WQMegaFigure(sl, 'composite', POCs[:6], 1, nsqdata, bmpdb)
    mf2 = pycvc.summary.WQMegaFigure(sl, 'composite', POCs[6:], 2, nsqdata, bmpdb)
    for n, mf in enumerate([mf1, mf2]):
        print('\tTime Series {0}'.format(n+1))
        mf.timeseriesFigure(load=False)
        mf.timeseriesFigure(load=True)

        print('\tStat plots {0}'.format(n+1))
        mf.statplotFigure(load=False)
        mf.statplotFigure(load=True)

        print('\tBMPDB Boxplots {0}'.format(n+1))
        mf.bmpCategoryBoxplotFigure()

        print('\tNSQD Boxplots {0}'.format(n+1))
        mf.landuseBoxplotFigure()

        print('\tSeasonal Boxplots {0}'.format(n+1))
        mf.seasonalBoxplotFigure(load=False)
        mf.seasonalBoxplotFigure(load=True)
     
    plt.close('all')

Unsampled loading estimates

Warning: Site objects (e.g., ED1) have hidden _unsampled_loading_estimates methods that return load estimates of unsampled storms using the estimated median influent concentrations and median effluent concentrations. However, it is highly recommended that you aggregate the data and don't draw conclusions about individual storms.

The cell below aggregates the data for each parameter, season, and whether the storms produced outflow. The results (sums) are then saved to an Excel file, one tab for each site.


In [ ]:
cols = [
    'duration_hours', 'total_precip_depth_mm', 
    'runoff_m3', 'bypass_m3', 'inflow_m3', 'outflow_m3', 
    'load_runoff', 'load_bypass', 'load_inflow', 'load_outflow',
]

with pandas.ExcelWriter("output/xlsx/CVCHydro_UnsampledLoadEstimates.xlsx") as unsampled_file:
    for site in [ED1, LV1, LV2, LV4]:
        loads = (
            site._unsampled_load_estimates()
                .groupby(['season', 'has_outflow', 'parameter', 'load_units'])
                .sum()
                .select(lambda c: c in cols, axis=1)
                .reset_index()
            )
        loads.to_excel(unsampled_file, sheet_name=site.siteid)